This R markdown file summarizes Simulation Study results.

rm(list=ls())  # clean up workspace
setwd("/Users/xji3/GitFolders/EDN_ECP/SimulationStudy/")

source("./ReadInEDNECP.R")

09152017

Show the PSJS estimated results for Simulated datasets with estimated tau

realized.tract.dist <- function(L, p){
  x <- 1:(L-1)
  dist.1 <- (2 + (L-1-x)*p)/(1.0/p + L -1.)*(1-p)^(x-1)
  dist.L <- 1/p/(1.0/p + L -1.)*(1-p)^(L-1)
  dist <- c(dist.1, dist.L)
  mean.L <- sum(x * dist.1) + L*dist.L
  var.L <- sum(x^2 * dist.1) + L^2*dist.L - mean.L^2
  return(list(dist = dist, mean = mean.L, sd = sqrt(var.L)))
}
Tract.list <- c(3.0, 10.0, 50.0)
for(tract in Tract.list){
  target_summary <- get(paste("PSJS_HKY_Tract_", toString(tract), "_combined_summary", sep = ""))
  col.names <- target_summary["tract_length", ] < 10*tract
  sim_info <- get(paste("sim.tract.", toString(tract), sep = ""))
  
  hist(target_summary["tract_length", col.names], breaks = 50,
       main = paste("PSJS Estimated Tract length 1/p, Tract = ", toString(tract), ".0 ", sep = ""))
  abline(v =  realized.tract.dist(471, 1.0/tract)$mean, col = "blue")
  abline(v =  tract, col = 2)
  abline(v =  mean(sim_info["mean subtract length", ]), col = "green")
  
  hist(-log(target_summary["tract_length", col.names]), breaks = 50,
       main = paste("PSJS Estimated ln(p), Tract = ", toString(tract), ".0", sep = ""))
  abline(v = log(1.0 / realized.tract.dist(471, 1.0/tract)$mean), col = "blue")
  abline(v = log(1.0 / tract), col = 2)
  abline(v = log(1.0 / mean(sim_info["mean subtract length", ])), col = "green")
  
  
  hist((1/target_summary["tract_length", col.names]), breaks = 50,
       main = paste("PSJS Estimated p, Tract = ", toString(tract), ".0", sep = ""))
  abline(v = 1.0 / tract, col = 2)
  abline(v = 1.0 / realized.tract.dist(471, 1.0/tract)$mean, col = "blue")
  abline(v = 1.0 / mean(sim_info["mean subtract length", ]), col = "green")
  
  plot(sim_info["num IGC", ], sim_info["mean potential tract length", ], 
       main = paste("Simulated potential tract length, Tract = ", toString(tract), sep = ""), 
       xlab = "number of IGC events", ylab = "mean potential tract length")
  abline(h = tract, col = "red")
  abline(h = mean(sim_info["mean subtract length", ]), col = "green")
  
  plot(sim_info["num IGC", ], sim_info["mean realized tract length", ], 
       main = paste("Simulated realized tract length, Tract = ", toString(tract), sep = ""),
       xlab = "number of IGC events", ylab = "mean realized tract length")
  abline(h = realized.tract.dist(471, 1/tract)$mean, col = "red")
  abline(h = mean(sim_info["mean subtract length", ]), col = "green")
  
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], target_summary["tract_length", col.names],
       main = paste("PSJS estimate of Tract ", toString(tract), sep = ""),
       xlab = "number of IGC events", ylab = "PSJS estimated tract length")
  abline(h = tract, col = "red")
  abline(h = realized.tract.dist(471, 1/tract)$mean, col = "blue")
  abline(h = mean(sim_info["mean subtract length", ]), col = "green")
  
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], target_summary["tract_length", col.names],
       main = paste("PSJS estimate of Tract ", toString(tract), sep = ""),
       xlab = "number of IGC events", ylab = "PSJS estimated tract length")
  abline(h = tract, col = "red")
  abline(h = realized.tract.dist(471, 1/tract)$mean, col = "blue")
  abline(h = mean(sim_info["mean subtract length", ]), col = "green")
  
  plot(sim_info["num IGC with at least two variant sites", colnames(target_summary)[col.names]], 
       target_summary["tract_length", col.names],
       main = paste(" PSJS estimated vs num IGC >1 variant sites - Tract ", toString(tract), sep = ""),
       xlab = "num IGC with at least two variant sites", ylab = "PSJS estimated tract length")
  abline(h = tract, col = 2)
  abline(h = mean(sim_info["mean subtract length", ]), col = "green")
  
  # Now plot kappa estimates
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       target_summary["kappa", col.names],
       main = paste("PSJS kappa, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS estimated kappa")
  abline(h = 2.1148820530158794, col = 2)
  
  # Now plot r2 estimates
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       target_summary["r2", col.names],
       main = paste("PSJS r2, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS estimated r2")
  abline(h = 1.5189901654372941, col = 2)
  
  # Now plot r3 estimates
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       target_summary["r3", col.names],
       main = paste("PSJS r3, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS estimated r3")
  abline(h = 1.5578589357425792, col = 2)
  
  # Now plot initiation rate estimates
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       target_summary["init_rate", col.names],
       main = paste("PSJS init_rate, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS estimated init_rate")
  abline(h = 2.4307123664566772 / tract, col = 2)
  
  # Now plot tract p estimates
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       1.0/target_summary["tract_length", col.names],
       main = paste("PSJS tract_p, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS estimated tract_p")
  abline(h = 1.0 / tract, col = 2)
  
  # Now plot Tau estimates
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       target_summary["init_rate", col.names]*target_summary["tract_length", col.names],
       main = paste("PSJS Tau, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS estimated Tau")
  abline(h = 2.4307123664566772, col = 2) 
  
  plot(sim_info["num IGC", colnames(target_summary)[col.names]], 
       target_summary["init_rate", col.names]*target_summary["tract_length", col.names]*3.0/(1.0 +target_summary["r2", col.names] + target_summary["r3", col.names]),
       main = paste("PSJS Weighted Tau, Tract = ", toString(tract), sep = ""),
       xlab = "Num IGC", ylab = "PSJS Tau*3/(1+r2+r3)")
  abline(h = 1.7886698571354107, col = 2) 
  
  # Now plot initiation rate vs tract length
  plot(target_summary["tract_length", col.names], 
       target_summary["init_rate", col.names],
       main = paste("PSJS init_rate, Tract = ", toString(tract), sep = ""),
       xlab = "Tract length", ylab = "initiation rate")
  lines(sort(target_summary["tract_length", col.names]), 2.4307123664566772/sort(target_summary["tract_length", col.names]), 
        col = "red", type = "l")  
  
  plot(sim_info["mean realized tract length", colnames(target_summary)[col.names]], 
       target_summary["tract_length", col.names],
       main = paste(" mean realized vs PSJS estimated Tract ", toString(tract), sep = ""),
       xlab = "mean realized IGC tract length", ylab = "PSJS estimated")
  abline(a= 0.0, b = 1.0, col = "red") 
  
  plot(sim_info["mean potential tract length", colnames(target_summary)[col.names]], 
       target_summary["tract_length", col.names],
       main = paste(" mean potential vs PSJS estimated Tract ", toString(tract), sep = ""),
       xlab = "mean potential IGC tract length", ylab = "PSJS estimated")
  abline(a = 0.0, b = 1.0, col = 2) 
  
  plot(sim_info["mean subtract length", colnames(target_summary)[col.names]], 
       target_summary["tract_length", col.names],
       main = paste(" mean longest variant pair length vs PSJS estimated Tract ", toString(tract), sep = ""),
       xlab = "mean longest variant pair length", ylab = "PSJS estimated")
  abline(a = 0.0, b = 1.0, col = "green") 
  
  
  print(paste("Tract = ", toString(tract), ".0 combined PSJS HKY Results", sep = ""))
  print(matrix(c("Total samples", sum(col.names),
                 "mean estimates", mean(target_summary["tract_length", col.names]), 
                 "sd estimates", sd(target_summary["tract_length", col.names]),
                 "mean longest variant pair", mean(sim_info["mean subtract length", col.names]),
                 "sd longest variant pair", sd(sim_info["mean subtract length", col.names])), 2, 5))  
  
}

## [1] "Tract = 3.0 combined PSJS HKY Results"
##      [,1]            [,2]               [,3]              
## [1,] "Total samples" "mean estimates"   "sd estimates"    
## [2,] "100"           "2.96975048272621" "1.51845925735691"
##      [,4]                        [,5]                     
## [1,] "mean longest variant pair" "sd longest variant pair"
## [2,] "1.56708704522834"          "0.298332278063919"

## [1] "Tract = 10.0 combined PSJS HKY Results"
##      [,1]            [,2]               [,3]             
## [1,] "Total samples" "mean estimates"   "sd estimates"   
## [2,] "100"           "8.97899383424202" "4.2631365454197"
##      [,4]                        [,5]                     
## [1,] "mean longest variant pair" "sd longest variant pair"
## [2,] "5.80636428287317"          "2.03422876841587"

## [1] "Tract = 50.0 combined PSJS HKY Results"
##      [,1]            [,2]               [,3]              
## [1,] "Total samples" "mean estimates"   "sd estimates"    
## [2,] "100"           "32.7592161296646" "24.4441490372758"
##      [,4]                        [,5]                     
## [1,] "mean longest variant pair" "sd longest variant pair"
## [2,] "38.5288401875902"          "23.5188708743709"

save workspace now

save.image("./SimulationStudy.RData")